<?xml version="1.0" encoding="utf-8"?>
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN" "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
<html xmlns="http://www.w3.org/1999/xhtml"><head><link rel="stylesheet" type="text/css" href="insn.css"/><meta name="generator" content="iform.xsl"/><title>BFMOPA (non-widening)</title></head><body><table style="margin: 0 auto;"><tr><td><div class="topbar"><a href="index.html">Base Instructions</a></div></td><td><div class="topbar"><a href="fpsimdindex.html">SIMD&amp;FP Instructions</a></div></td><td><div class="topbar"><a href="sveindex.html">SVE Instructions</a></div></td><td><div class="topbar"><a href="mortlachindex.html">SME Instructions</a></div></td><td><div class="topbar"><a href="encodingindex.html">Index by Encoding</a></div></td><td><div class="topbar"><a href="shared_pseudocode.html">Shared Pseudocode</a></div></td><td><div class="topbar"><a href="notice.html">Proprietary Notice</a></div></td></tr></table><hr/><h2 class="instruction-section">BFMOPA (non-widening)</h2><p>BFloat16 floating-point outer product and accumulate</p>
      <p class="aml">This instruction works with a 16-bit element ZA tile.</p>
      <p class="aml">These instructions generate an outer product of the first source vector and the second source vector. The first source is SVL<sub>H</sub>×1 vector and the second source is 1×SVL<sub>H</sub> vector.</p>
      <p class="aml">Each source vector is independently predicated by a corresponding governing predicate. When either source vector element is Inactive the corresponding destination tile element remains unmodified.</p>
      <p class="aml">The resulting outer product, SVL<sub>H</sub>×SVL<sub>H</sub>, is then destructively added to the destination tile. This is equivalent to performing a single multiply-accumulate to each of the destination tile elements.</p>
      <p class="aml">This instruction follows SME2.1 ZA-targeting non-widening BFloat16 numerical behaviors.</p>
      <p class="aml">ID_AA64SMFR0_EL1.B16B16 indicates whether this instruction is implemented.</p>
    
    <h3 class="classheading"><a id="iclass_mortlach2"/>SME2<span style="font-size:smaller;"><br/>(FEAT_SVE_B16B16)
          </span></h3><p class="desc"/><div class="regdiagram-32"><table class="regdiagram"><thead><tr><td>31</td><td>30</td><td>29</td><td>28</td><td>27</td><td>26</td><td>25</td><td>24</td><td>23</td><td>22</td><td>21</td><td>20</td><td>19</td><td>18</td><td>17</td><td>16</td><td>15</td><td>14</td><td>13</td><td>12</td><td>11</td><td>10</td><td>9</td><td>8</td><td>7</td><td>6</td><td>5</td><td>4</td><td>3</td><td>2</td><td>1</td><td>0</td></tr></thead><tbody><tr class="firstrow"><td class="l">1</td><td class="r">0</td><td class="l">0</td><td>0</td><td>0</td><td>0</td><td>0</td><td>1</td><td>1</td><td>0</td><td class="r">1</td><td colspan="5" class="lr">Zm</td><td colspan="3" class="lr">Pm</td><td colspan="3" class="lr">Pn</td><td colspan="5" class="lr">Zn</td><td class="lr">0</td><td class="lr">1</td><td class="l">0</td><td class="r">0</td><td class="lr">ZAda</td></tr><tr class="secondrow"><td colspan="2"/><td colspan="9"/><td colspan="5"/><td colspan="3"/><td colspan="3"/><td colspan="5"/><td class="droppedname">S</td><td/><td colspan="2"/><td/></tr></tbody></table></div><div class="encoding"><h4 class="encoding"/><a id="bfmopa_za_pp_zz_16"/><p class="asm-code">BFMOPA  <a href="#sa_zada" title="ZA tile ZA0-ZA1 (field &quot;ZAda&quot;)">&lt;ZAda&gt;</a>.H, <a href="#sa_pn" title="First governing scalable predicate register P0-P7 (field &quot;Pn&quot;)">&lt;Pn&gt;</a>/M, <a href="#sa_pm" title="Second governing scalable predicate register P0-P7 (field &quot;Pm&quot;)">&lt;Pm&gt;</a>/M, <a href="#sa_zn" title="First source scalable vector register (field &quot;Zn&quot;)">&lt;Zn&gt;</a>.H, <a href="#sa_zm" title="Second source scalable vector register (field &quot;Zm&quot;)">&lt;Zm&gt;</a>.H</p></div><p class="pseudocode">if !<a href="shared_pseudocode.html#impl-aarch64.HaveSMEB16B16.0" title="function: boolean HaveSMEB16B16()">HaveSMEB16B16</a>() then UNDEFINED;
integer a = <a href="shared_pseudocode.html#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(Pn);
integer b = <a href="shared_pseudocode.html#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(Pm);
integer n = <a href="shared_pseudocode.html#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(Zn);
integer m = <a href="shared_pseudocode.html#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(Zm);
integer da = <a href="shared_pseudocode.html#impl-shared.UInt.1" title="function: integer UInt(bits(N) x)">UInt</a>(ZAda);
boolean sub_op = FALSE;</p>
  <div class="encoding-notes"/><h3 class="explanations">Assembler Symbols</h3><div class="explanations"><table><col class="asyn-l"/><col class="asyn-r"/><tr><td>&lt;ZAda&gt;</td><td><a id="sa_zada"/>
        
          <p class="aml">Is the name of the ZA tile ZA0-ZA1, encoded in the "ZAda" field.</p>
        
      </td></tr></table><table><col class="asyn-l"/><col class="asyn-r"/><tr><td>&lt;Pn&gt;</td><td><a id="sa_pn"/>
        
          <p class="aml">Is the name of the first governing scalable predicate register P0-P7, encoded in the "Pn" field.</p>
        
      </td></tr></table><table><col class="asyn-l"/><col class="asyn-r"/><tr><td>&lt;Pm&gt;</td><td><a id="sa_pm"/>
        
          <p class="aml">Is the name of the second governing scalable predicate register P0-P7, encoded in the "Pm" field.</p>
        
      </td></tr></table><table><col class="asyn-l"/><col class="asyn-r"/><tr><td>&lt;Zn&gt;</td><td><a id="sa_zn"/>
        
          <p class="aml">Is the name of the first source scalable vector register, encoded in the "Zn" field.</p>
        
      </td></tr></table><table><col class="asyn-l"/><col class="asyn-r"/><tr><td>&lt;Zm&gt;</td><td><a id="sa_zm"/>
        
          <p class="aml">Is the name of the second source scalable vector register, encoded in the "Zm" field.</p>
        
      </td></tr></table></div><div class="syntax-notes"/>
    <div class="ps"><a id="execute"/><h3 class="pseudocode">Operation</h3>
      <p class="pseudocode"><a href="shared_pseudocode.html#impl-aarch64.CheckStreamingSVEAndZAEnabled.0" title="function: CheckStreamingSVEAndZAEnabled()">CheckStreamingSVEAndZAEnabled</a>();
constant integer VL = <a href="shared_pseudocode.html#impl-aarch64.CurrentVL.read.none" title="accessor: integer CurrentVL">CurrentVL</a>;
constant integer PL = VL DIV 8;
constant integer dim = VL DIV 16;
bits(PL) mask1 = <a href="shared_pseudocode.html#impl-aarch64.P.read.2" title="accessor: bits(width) P[integer n, integer width]">P</a>[a, PL];
bits(PL) mask2 = <a href="shared_pseudocode.html#impl-aarch64.P.read.2" title="accessor: bits(width) P[integer n, integer width]">P</a>[b, PL];
bits(VL) operand1 = <a href="shared_pseudocode.html#impl-aarch64.Z.read.2" title="accessor: bits(width) Z[integer n, integer width]">Z</a>[n, VL];
bits(VL) operand2 = <a href="shared_pseudocode.html#impl-aarch64.Z.read.2" title="accessor: bits(width) Z[integer n, integer width]">Z</a>[m, VL];
bits(dim*dim*16) operand3 = <a href="shared_pseudocode.html#impl-aarch64.ZAtile.read.3" title="accessor: bits(width) ZAtile[integer tile, integer esize, integer width]">ZAtile</a>[da, 16, dim*dim*16];
bits(dim*dim*16) result;

for row = 0 to dim-1
    for col = 0 to dim-1
        bits(16) element1 = <a href="shared_pseudocode.html#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand1, row, 16];
        bits(16) element2 = <a href="shared_pseudocode.html#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand2, col, 16];
        bits(16) element3 = <a href="shared_pseudocode.html#impl-shared.Elem.read.3" title="accessor: bits(size) Elem[bits(N) vector, integer e, integer size]">Elem</a>[operand3, row*dim+col, 16];

        if <a href="shared_pseudocode.html#impl-aarch64.ActivePredicateElement.3" title="function: boolean ActivePredicateElement(bits(N) pred, integer e, integer esize)">ActivePredicateElement</a>(mask1, row, 16) &amp;&amp; <a href="shared_pseudocode.html#impl-aarch64.ActivePredicateElement.3" title="function: boolean ActivePredicateElement(bits(N) pred, integer e, integer esize)">ActivePredicateElement</a>(mask2, col, 16) then
            if sub_op then element1 = <a href="shared_pseudocode.html#impl-shared.BFNeg.1" title="function: bits(N) BFNeg(bits(N) op)">BFNeg</a>(element1);
            <a href="shared_pseudocode.html#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, row*dim+col, 16] = <a href="shared_pseudocode.html#impl-shared.BFMulAdd_ZA.4" title="function: bits(N) BFMulAdd_ZA(bits(N) addend, bits(N) op1, bits(N) op2, FPCRType fpcr_in)">BFMulAdd_ZA</a>(element3, element1, element2, FPCR[]);
        else
            <a href="shared_pseudocode.html#impl-shared.Elem.write.3" title="accessor: Elem[bits(N) &amp;vector, integer e, integer size] = bits(size) value">Elem</a>[result, row*dim+col, 16] = element3;

<a href="shared_pseudocode.html#impl-aarch64.ZAtile.write.3" title="accessor: ZAtile[integer tile, integer esize, integer width] = bits(width) value">ZAtile</a>[da, 16, dim*dim*16] = result;</p>
    </div>
  <hr/><table style="margin: 0 auto;"><tr><td><div class="topbar"><a href="index.html">Base Instructions</a></div></td><td><div class="topbar"><a href="fpsimdindex.html">SIMD&amp;FP Instructions</a></div></td><td><div class="topbar"><a href="sveindex.html">SVE Instructions</a></div></td><td><div class="topbar"><a href="mortlachindex.html">SME Instructions</a></div></td><td><div class="topbar"><a href="encodingindex.html">Index by Encoding</a></div></td><td><div class="topbar"><a href="shared_pseudocode.html">Shared Pseudocode</a></div></td><td><div class="topbar"><a href="notice.html">Proprietary Notice</a></div></td></tr></table><p class="versions">
      Internal version only: isa v33.62, AdvSIMD v29.12, pseudocode v2023-03_rel, sve v2023-03_rc3b
      ; Build timestamp: 2023-03-31T11:36
    </p><p class="copyconf">
      Copyright © 2010-2023 Arm Limited or its affiliates. All rights reserved.
      This document is Non-Confidential.
    </p></body></html>
